/***
This do-file creates 3 figures to benchmark the employment series in the
tracker agains ADP and CPS.
***/

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Employment"

* Create required subfolders
cap mkdir "${root}/data/derived/CES"
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

*-------------------------------------------------------------------------------
**# Trends in employment: Paychex vs. ADP
*-------------------------------------------------------------------------------

* Import Employment data
project, uses("${root}/data/web/data/Employment - National - Weekly.csv")
import delimited "${root}/data/web/data/Employment - National - Weekly.csv", clear

gen date = mdy(month, day_endofweek, year)
format date %d

keep date emp_incq1 emp_incq2 emp_incq3 emp_incq4

* Rescale
forvalues q = 1/4 {
replace emp_incq`q' =  emp_incq`q' * 100
}
rename (emp_incq1 emp_incq2 emp_incq3 emp_incq4) (emp_combined_q1 emp_combined_q2 emp_combined_q3 emp_combined_q4)

* Add ADP by quartile
* Approximations of Figure 5A from https://www.brookings.edu/wp-content/uploads/2020/06/Cajner-et-al-Conference-Draft.pdf
* Employment changes are relative to Feb 1 2020
preserve

keep date
tsset date
tsfill

gen adp_q1 = .
replace adp_q1 = 0 if date == td(1feb2020)
replace adp_q1 = 0 if date == td(8feb2020)
replace adp_q1 = 0 if date == td(15feb2020)
replace adp_q1 = -0.01 if date == td(22feb2020)
replace adp_q1 = -0.01 if date == td(29feb2020)
replace adp_q1 = -0.02 if date == td(7mar2020)
replace adp_q1 = -0.05 if date == td(14mar2020)
replace adp_q1 = -0.13 if date == td(21mar2020)
replace adp_q1 = -0.24 if date == td(28mar2020)
replace adp_q1 = -0.32 if date == td(4apr2020)
replace adp_q1 = -0.36 if date == td(11apr2020)
replace adp_q1 = -0.38 if date == td(18apr2020)
replace adp_q1 = -0.38 if date == td(25apr2020)
replace adp_q1 = -0.36 if date == td(2may2020)
replace adp_q1 = -0.34 if date == td(9may2020)
replace adp_q1 = -0.31 if date == td(16may2020)

gen adp_q2 = .
replace adp_q2 = 0 if date == td(1feb2020)
replace adp_q2 = 0 if date == td(8feb2020)
replace adp_q2 = 0 if date == td(15feb2020)
replace adp_q2 = 0 if date == td(22feb2020)
replace adp_q2 = 0 if date == td(29feb2020)
replace adp_q2 = -0.01 if date == td(7mar2020)
replace adp_q2 = -0.04 if date == td(14mar2020)
replace adp_q2 = -0.09 if date == td(21mar2020)
replace adp_q2 = -0.15 if date == td(28mar2020)
replace adp_q2 = -0.19 if date == td(4apr2020)
replace adp_q2 = -0.22 if date == td(11apr2020)
replace adp_q2 = -0.23 if date == td(18apr2020)
replace adp_q2 = -0.22 if date == td(25apr2020)
replace adp_q2 = -0.21 if date == td(2may2020)
replace adp_q2 = -0.20 if date == td(9may2020)
replace adp_q2 = -0.17 if date == td(16may2020)

gen adp_q3 = .
replace adp_q3 = 0 if date == td(1feb2020)
replace adp_q3 = 0 if date == td(8feb2020)
replace adp_q3 = 0 if date == td(15feb2020)
replace adp_q3 = 0 if date == td(22feb2020)
replace adp_q3 = 0 if date == td(29feb2020)
replace adp_q3 = 0 if date == td(7mar2020)
replace adp_q3 = -0.03 if date == td(14mar2020)
replace adp_q3 = -0.07 if date == td(21mar2020)
replace adp_q3 = -0.11 if date == td(28mar2020)
replace adp_q3 = -0.14 if date == td(4apr2020)
replace adp_q3 = -0.17 if date == td(11apr2020)
replace adp_q3 = -0.18 if date == td(18apr2020)
replace adp_q3 = -0.17 if date == td(25apr2020)
replace adp_q3 = -0.16 if date == td(2may2020)
replace adp_q3 = -0.15 if date == td(9may2020)
replace adp_q3 = -0.14 if date == td(16may2020)

gen adp_q4 = .
replace adp_q4 = 0 if date == td(1feb2020)
replace adp_q4 = -0.01 if date == td(8feb2020)
replace adp_q4 = 0 if date == td(15feb2020)
replace adp_q4 = 0 if date == td(22feb2020)
replace adp_q4 = 0 if date == td(29feb2020)
replace adp_q4 = -0.01 if date == td(7mar2020)
replace adp_q4 = -0.03 if date == td(14mar2020)
replace adp_q4 = -0.06 if date == td(21mar2020)
replace adp_q4 = -0.09 if date == td(28mar2020)
replace adp_q4 = -0.12 if date == td(4apr2020)
replace adp_q4 = -0.14 if date == td(11apr2020)
replace adp_q4 = -0.14 if date == td(18apr2020)
replace adp_q4 = -0.13 if date == td(25apr2020)
replace adp_q4 = -0.12 if date == td(2may2020)
replace adp_q4 = -0.12 if date == td(9may2020)
replace adp_q4 = -0.11 if date == td(16may2020)

gen adp_q5 = .
replace adp_q5 = 0 if date == td(1feb2020)
replace adp_q5 = 0 if date == td(8feb2020)
replace adp_q5 = 0.01 if date == td(15feb2020)
replace adp_q5 = 0 if date == td(22feb2020)
replace adp_q5 = -0.02 if date == td(29feb2020)
replace adp_q5 = -0.02 if date == td(7mar2020)
replace adp_q5 = -0.02 if date == td(14mar2020)
replace adp_q5 = -0.04 if date == td(21mar2020)
replace adp_q5 = -0.07 if date == td(28mar2020)
replace adp_q5 = -0.09 if date == td(4apr2020)
replace adp_q5 = -0.09 if date == td(11apr2020)
replace adp_q5 = -0.08 if date == td(18apr2020)
replace adp_q5 = -0.07 if date == td(25apr2020)
replace adp_q5 = -0.06 if date == td(2may2020)
replace adp_q5 = -0.05 if date == td(9may2020)
replace adp_q5 = -0.06 if date == td(16may2020)

replace adp_q1 = adp_q1 * 100
replace adp_q2 = adp_q2 * 100
replace adp_q3 = adp_q3 * 100
replace adp_q4 = adp_q4 * 100
replace adp_q5 = adp_q5 * 100

* ADP dates are Saturdays and represent two-week trailing averages. 
* So they are much more representative of the Friday just before the ADP date. 
gen Fridays = date - dow(date) + 5
format Fridays %td

drop date
rename Fridays date

gcollapse adp_q*, by(date)

tempfile adp
save `adp'

restore

merge 1:1 date using `adp', nogen

tw (line emp_combined_q1 date if date <= mdy(5, 16, 2020)  , sort lcolor(oi1)) ///
(line emp_combined_q2 date if date <= mdy(5, 16, 2020) , sort lcolor(oi3)) ///
(line emp_combined_q3 date if date <= mdy(5, 16, 2020) , sort lcolor(oi4)) ///
(line emp_combined_q4 date if date <= mdy(5, 16, 2020) , sort lcolor(oi2)) ///
(connected adp_q1 date if date <= mdy(5, 16, 2020)  , sort color(oi1) lpattern(dash)) ///
(connected adp_q2 date if date <= mdy(5, 16, 2020)  , sort color(oi3) lpattern(dash)) ///
(connected adp_q3 date if date <= mdy(5, 16, 2020)  , sort color(oi4) lpattern(dash)) ///
(connected adp_q4 date if date <= mdy(5, 16, 2020)  , sort color(oi6) lpattern(dash)) ///
(connected adp_q5 date if date <= mdy(5, 16, 2020)  , sort color(oi2) lpattern(dash)), ///
	xlab(`=mdy(1, 15, 2020)' "Jan 15" `=mdy(2, 15, 2020)' "Feb 15" `=mdy(3, 15, 2020)' "Mar 15" ///
		 `=mdy(4, 15, 2020)' "Apr 15" `=mdy(5, 15, 2020)' "May 15") xtitle(" ") ///
	legend(order(1 "Paychex-Intuit Q1 (Bottom)" 2 "Q2" 3 "Q3" 4 "Q4 (Top)" 5 "ADP Q1 (Bottom)" 6 "Q2" 7 "Q3" 8 "Q4" 9 "Q5 (Top)") size(3) col(1) ring(0) pos(7) region(fcolor(white%0) lcolor(white%0)) symxsize(huge)) ///
	${title_`version'} ///
	ylabel(-40 "-40%" -30 "-30%" -20 "-20%" -10 "-10%" 0 "0%", nogrid) yscale(range(-40 5))  ytitle("Change in Employment Relative to January")  ///
	ytitle("Change in Employment (%)" "Relative to January 2020") ///
	xline(`=td(17apr2020)', lpattern(dash) lcolor(black)) ///
	xsc(ra(`=mdy(1, 11, 2020)' `=mdy(5, 17, 2020)')) ///
text(7.5 `=td(17apr2020)' "Apr 17", color(black) size(small))

oi_graph_export "${root}/results/Employment/Employment Tracker vs ADP by Income Quartile", type(${fig_type})

*-------------------------------------------------------------------------------
**# Change in employment: Paychex vs. ADP vs. CPS (May 2020)
*-------------------------------------------------------------------------------

keep if date == td(15may2020) | date == td(17dec2021)
gen period = 1 if date == td(15may2020)
replace period = 2 if date == td(17dec2021)

cap drop month _merge
greshape long emp_combined_q adp_q, i(date period) j(quartile)

* Load CPS file
preserve
project, uses("${root}/data/derived/CPS/CPS by wage quartile.dta")
use "${root}/data/derived/CPS/CPS by wage quartile.dta", clear

gen Fridays = date - dow(date) + 5                                              
format Fridays %td

drop date
rename Fridays date

* Save as tempfile
keep date quartile norm_emp_cps
tempfile cps
save `cps'
restore

merge 1:1 date quartile using `cps', nogen keep(3)

rename (emp_combined_q adp_q) (emp_combined adp)

replace adp = -11 if quartile == 3 // make Quintile 4 into Q3 (drop quintile 3)
replace adp = -6 if quartile == 4  // make Quintile 5 into Q4
drop if quartile == 5

label def quartile 1 "Bottom Quartile" 2 "Second Quartile" 3 "Third Quartile" 4 "Top Quartile"
label val quartile quartile

graph bar emp_combined norm_emp_cps adp  if period == 1 , ///
 over(quartile) bar(1, color(oi1)) bar(2, color(oi3)) bar(3, color(oi2))  /// ///
 ylabel(-40 "-40%" -30 "-30%" -20 "-20%" -10 "-10%" 0 "0%", nogrid) yscale(range(-40)) ytitle("Change in Employment (%)" "from January to May 2020")  ///
 ${title_`version'} legend(order(1 "Paychex-Intuit" 2 "CPS" 3 "ADP") cols(1) position(4) ring(0)) ///
 b1title( "Wage Quartile", size(3.5) color(gs7))

oi_graph_export "${root}/results/Employment/Employment Tracker vs ADP vs CPS by Income Quartile", type(${fig_type})

*-------------------------------------------------------------------------------
**# Change in employment: Paychex vs. CPS (Dec 2021)
*-------------------------------------------------------------------------------

* Import Employment data
project, uses("${root}/data/web/data/Employment - State - Weekly.csv")
import delimited "${root}/data/web/data/Employment - State - Weekly.csv", clear

gen date = mdy(month, day_endofweek, year)
format date %d

keep statefips date emp_incq1 emp_incq2 emp_incq3 emp_incq4

* Rescale
forvalues q = 1/4 {
	replace emp_incq`q' =  emp_incq`q' * 100
}
rename (emp_incq1 emp_incq2 emp_incq3 emp_incq4) (emp_combined_q1 emp_combined_q2 emp_combined_q3 emp_combined_q4)

drop if inlist(statefips, 6, 25, 36)

* Merge population
preserve
project, uses("${root}/data/derived/ACS 2014-2018 5-Year State/ACS 2014-2018 State.dta")
use "${root}/data/derived/ACS 2014-2018 5-Year State/ACS 2014-2018 State.dta", clear

rename (state_fips pop_2014_2018) (statefips pop_2018)
keep statefips pop_2018

tempfile state_pop
save `state_pop'

restore

merge m:1 statefips using `state_pop', keep(3) nogen
gcollapse (mean) emp_combined* [w = pop_2018], by(date)

keep if date == td(17dec2021)
cap drop month _merge
reshape long emp_combined_q adp_q, i(date) j(quartile)

* Load CPS file
preserve
project, uses("${root}/data/derived/CPS/CPS by wage quartile excluding NY CA MA.dta")
use "${root}/data/derived/CPS/CPS by wage quartile excluding NY CA MA.dta", clear

gen Fridays = date - dow(date) + 5                                              
format Fridays %td

drop date
rename Fridays date

* Save as tempfile
keep date quartile norm_emp_cps
tempfile cps
save `cps'
restore

merge 1:1 date quartile using `cps', nogen keep(3)

rename (emp_combined_q ) (emp_combined )
label def quartile 1 "Bottom Quartile" 2 "Second Quartile" 3 "Third Quartile" 4 "Top Quartile"
label val quartile quartile

graph bar emp_combined norm_emp_cps, ///
 over(quartile) bar(1, color(oi1)) bar(2, color(oi3))  ///
 ylabel(-40 "-40%" -30 "-30%" -20 "-20%" -10 "-10%" 0 "0%" 10 "+10%" 20 "+20%" , nogrid) yscale(range(-40)) ytitle("Change in Employment (%)" "from January 2020 to December 2021")  ///
 ${title_`version'} legend(order(1 "Paychex-Intuit" 2 "CPS") cols(1) position(4) ring(0)) ///
 b1title( "Wage Quartile", size(3.5) color(gs7))

oi_graph_export "${root}/results/Employment/Employment Tracker vs CPS by Income Quartile excl NY CA MA - Dec 2021", type(${fig_type})

*-------------------------------------------------------------------------------
* Output numbers
*-------------------------------------------------------------------------------

sum norm_emp_cps if quartile == 1
local q1_cps_dec2021: di %3.1f abs(r(mean))
di `q1_cps_dec2021'

cap erase "${root}/results/paper numbers/`category'/CPS Q1 decline in Dec 2021.yaml"

yamlout using "${root}/results/paper numbers/`category'/CPS Q1 decline in Dec 2021.yaml", ///
	key("q1_cps_dec2021") ///
	comment("CPS Q1 decline in Dec 2021") ///
	value(`q1_cps_dec2021') fmt(%9.1f)

project, creates("${root}/results/paper numbers/`category'/CPS Q1 decline in Dec 2021.yaml")
